---
output: html_document
editor_options: 
  chunk_output_type: console
---

# Packages and data

```{r}

library(tidyverse)
library(binom)
`%nin%`= Negate(`%in%`)

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

theme <- theme_bw() +
  theme(plot.title = element_text(hjust = 0.5, size=12),
        panel.border = element_blank(),
        legend.position="bottom",
        strip.background = element_blank(),
        legend.title = element_blank())

if (!file.exists("figures")) dir.create("figures")

# Load tweets and classified tweets (500 each category)
tweets <- readRDS("tweets.rds")
tweets_bat <- readRDS("tweets_classified.rds") %>% filter(bat > 0)
tweets_vit <- readRDS("tweets_classified.rds") %>% filter(vit > 0)
tweets_flu <- readRDS("tweets_classified.rds") %>% filter(flu > 0)
tweets_con <- readRDS("tweets_classified.rds") %>% filter(conspiracy > 0)

# Load news articles and classified news articles (whole universe)
news <- readRDS("news.rds") %>% mutate(covid = 1)
news_bat <- readRDS("news_classified.rds") %>% filter(bat > 0)
news_vit <- readRDS("news_classified.rds") %>% filter(vit > 0)
news_flu <- readRDS("news_classified.rds") %>% filter(flu > 0)
news_con <- readRDS("news_classified.rds") %>% filter(conspiracy > 0)

```

## Twitter

```{r}

tweets_bat_misinfo <- binom.confint(sum(tweets_bat$classification %in% c(1,4)), nrow(tweets_bat)) %>% filter(method == "wilson")
tweets_vit_misinfo <- binom.confint(sum(tweets_vit$classification %in% c(1,4)), nrow(tweets_vit)) %>% filter(method == "wilson")
tweets_flu_misinfo <- binom.confint(sum(tweets_flu$classification %in% c(1,4)), nrow(tweets_flu)) %>% filter(method == "wilson")
tweets_con_misinfo <- binom.confint(sum(tweets_con$classification %in% c(1,4)), nrow(tweets_con)) %>% filter(method == "wilson")

tweets_bat_combat <- binom.confint(sum(tweets_bat$classification %in% c(2)), nrow(tweets_bat)) %>% filter(method == "wilson")
tweets_vit_combat <- binom.confint(sum(tweets_vit$classification %in% c(2)), nrow(tweets_vit)) %>% filter(method == "wilson")
tweets_flu_combat <- binom.confint(sum(tweets_flu$classification %in% c(2)), nrow(tweets_flu)) %>% filter(method == "wilson")
tweets_con_combat <- binom.confint(sum(tweets_con$classification %in% c(2)), nrow(tweets_con)) %>% filter(method == "wilson")

tweets_bat_other <- binom.confint(sum(tweets_bat$classification %in% c(3)), nrow(tweets_bat)) %>% filter(method == "wilson")
tweets_vit_other <- binom.confint(sum(tweets_vit$classification %in% c(3)), nrow(tweets_vit)) %>% filter(method == "wilson")
tweets_flu_other <- binom.confint(sum(tweets_flu$classification %in% c(3)), nrow(tweets_flu)) %>% filter(method == "wilson")
tweets_con_other <- binom.confint(sum(tweets_con$classification %in% c(3)), nrow(tweets_con)) %>% filter(method == "wilson")

tw_vals <- tweets %>%
  filter(created_at > "2020-03-26", created_at <= "2020-04-06") %>% 
  select(status_id, covid:soc) %>% 
  pivot_longer(cols = covid:soc) %>% 
  group_by(name) %>% 
  summarize(value = sum(ifelse(value > 0, 1, 0)))

tweets_vals <- bind_rows(
  filter(tw_vals, name %nin% c('bat','flu','vit','conspiracy')) %>% mutate(type = "uncoded"),
  filter(tw_vals, name %in% c('bat','flu','vit','conspiracy')) %>% mutate(type = "raw"),
  filter(tw_vals, name %in% c('bat','flu','vit','conspiracy')) %>% mutate(type = "combat"),
  filter(tw_vals, name %in% c('bat','flu','vit','conspiracy')) %>% mutate(type = "misinf")
) %>% 
  mutate(pe = 
           case_when(
             name == "bat" & type == "misinf" ~ value*tweets_bat_misinfo$mean,
             name == "vit" & type == "misinf" ~ value*tweets_vit_misinfo$mean,
             name == "flu" & type == "misinf" ~ value*tweets_flu_misinfo$mean,
             name == "conspiracy" & type == "misinf" ~ value*tweets_con_misinfo$mean,
             name == "bat" & type == "combat" ~ value*tweets_bat_combat$mean,
             name == "vit" & type == "combat" ~ value*tweets_vit_combat$mean,
             name == "flu" & type == "combat" ~ value*tweets_flu_combat$mean,
             name == "conspiracy" & type == "combat" ~ value*tweets_con_combat$mean,
             name == "bat" ~ value*tweets_bat_other$mean,
             name == "vit" ~ value*tweets_vit_other$mean,
             name == "flu" ~ value*tweets_flu_other$mean,
             name == "conspiracy" ~ value*tweets_con_other$mean,
             TRUE ~ value
           ), 
         lb = case_when(
           name == "bat" & type == "misinf" ~ value*tweets_bat_misinfo$lower,
           name == "vit" & type == "misinf" ~ value*tweets_vit_misinfo$lower,
           name == "flu" & type == "misinf" ~ value*tweets_flu_misinfo$lower,
           name == "conspiracy" & type == "misinf" ~ value*tweets_con_misinfo$lower,
           TRUE ~ as.double(NA)
         ),
         ub = case_when(
           name == "bat" & type == "misinf" ~ value*tweets_bat_misinfo$upper,
           name == "vit" & type == "misinf" ~ value*tweets_vit_misinfo$upper,
           name == "flu" & type == "misinf" ~ value*tweets_flu_misinfo$upper,
           name == "conspiracy" & type == "misinf" ~ value*tweets_con_misinfo$upper,
           TRUE ~ as.double(NA)
         )
  ) %>% 
  mutate(total = max(value), pe = pe/total, lb = lb/total, ub = ub/total)
```

## News

```{r}

news_bat_misinfo <- binom.confint(sum(news_bat$classified %in% c(1,4)), nrow(news_bat)) %>% filter(method == "wilson")
news_vit_misinfo <- binom.confint(sum(news_vit$classified %in% c(1,4)), nrow(news_vit)) %>% filter(method == "wilson")
news_flu_misinfo <- binom.confint(sum(news_flu$classified %in% c(1,4)), nrow(news_flu)) %>% filter(method == "wilson")
news_con_misinfo <- binom.confint(sum(news_con$classified %in% c(1,4)), nrow(news_con)) %>% filter(method == "wilson")

news_bat_combat <- binom.confint(sum(news_bat$classified %in% c(2)), nrow(news_bat)) %>% filter(method == "wilson")
news_vit_combat <- binom.confint(sum(news_vit$classified %in% c(2)), nrow(news_vit)) %>% filter(method == "wilson")
news_flu_combat <- binom.confint(sum(news_flu$classified %in% c(2)), nrow(news_flu)) %>% filter(method == "wilson")
news_con_combat <- binom.confint(sum(news_con$classified %in% c(2)), nrow(news_con)) %>% filter(method == "wilson")

news_bat_other <- binom.confint(sum(news_bat$classified %in% c(3)), nrow(news_bat)) %>% filter(method == "wilson")
news_vit_other <- binom.confint(sum(news_vit$classified %in% c(3)), nrow(news_vit)) %>% filter(method == "wilson")
news_flu_other <- binom.confint(sum(news_flu$classified %in% c(3)), nrow(news_flu)) %>% filter(method == "wilson")
news_con_other <- binom.confint(sum(news_con$classified %in% c(3)), nrow(news_con)) %>% filter(method == "wilson")

news_vals = news %>%
  mutate(soc = ifelse(soc > 0, 1, 0)) %>% 
  ungroup() %>% 
  select(title, bat:covid) %>% 
  pivot_longer(cols = bat:covid) %>% 
  group_by(name) %>% 
  summarize(value = sum(ifelse(value > 0, 1, 0))) %>% 
  mutate(total = max(value), pe = value/total)

news_vals = bind_rows(
  filter(news_vals, name %nin% c('bat','flu','vit','conspiracy')) %>% mutate(type = "uncoded"),
  filter(news_vals, name %in% c('bat','flu','vit','conspiracy')) %>% mutate(type = "raw"),
  filter(news_vals, name %in% c('bat','flu','vit','conspiracy')) %>% mutate(type = "combat"),
  filter(news_vals, name %in% c('bat','flu','vit','conspiracy')) %>% mutate(type = "misinf")
) %>% 
  mutate(pe = 
           case_when(
             name == "bat" & type == "misinf" ~ value*news_bat_misinfo$mean,
             name == "vit" & type == "misinf" ~ value*news_vit_misinfo$mean,
             name == "flu" & type == "misinf" ~ value*news_flu_misinfo$mean,
             name == "conspiracy" & type == "misinf" ~ value*news_con_misinfo$mean,
             name == "bat" & type == "combat" ~ value*news_bat_combat$mean,
             name == "vit" & type == "combat" ~ value*news_vit_combat$mean,
             name == "flu" & type == "combat" ~ value*news_flu_combat$mean,
             name == "conspiracy" & type == "combat" ~ value*news_con_combat$mean,
             name == "bat" ~ value*news_bat_other$mean,
             name == "vit" ~ value*news_vit_other$mean,
             name == "flu" ~ value*news_flu_other$mean,
             name == "conspiracy" ~ value*news_con_other$mean,
             TRUE ~ value
           )
  ) %>% 
  mutate(total = max(value), pe = pe/total)
```

## Figure 1

```{r}
colors = RColorBrewer::brewer.pal(8,"RdYlGn")

ggdat = bind_rows(
  tweets_vals %>% mutate(medium = "Twitter"), 
  news_vals %>% mutate(medium = "News"),
) %>% 
  mutate(
  type = ordered(
      case_when(
        type == "misinf" ~ "Misinformation",
        type == "combat" ~ "Combatting misinformation",
        type == "uncoded" ~ "Public health\nrecommendations",
        TRUE ~ "Unrelated"
      ),
      levels = c("Public health\nrecommendations", "Combatting misinformation", "Misinformation", "Unrelated")
    ),
    category = ordered(
      case_when(
        name == "bat" ~ "Consumption of\nbats",
        name == "vit" ~ "Vitamin-C",
        name == "flu" ~ "No worse than\nthe flu",
        name == "soc" ~ "Social distancing",
        name == "motta" ~ "Motta",
        name == "conspiracy" ~ "Conspiracy",
        name == "qua" ~ "Quarantine",
        name == "hand" ~ "Hygiene"),
      levels = c("Vitamin-C","Consumption of\nbats","No worse than\nthe flu","Conspiracy",
                 "Hygiene","Social distancing")),
    type_of_information = ifelse(category %nin% c("Bat soup","Vitamin-C","No worse than\nthe flu",'Conspiracy'),
                                 "Information", "Misinformation")) %>%
  filter(!is.na(category), type != "Unrelated")

ggdat %>% 
  ggplot(aes(x = medium, y = pe, fill = type)) +
  geom_bar(stat = "identity", col = "black", position = "stack") +
  geom_errorbar(aes(ymin = lb, ymax = ub), width = 0, size = 0.7, color = "black") +
  theme +
  facet_wrap(~category, scales = "free") + 
  scale_y_continuous(labels = scales::percent) + 
  scale_fill_manual(values = colors[c(1,6,4,4)],
                     breaks = c("Misinformation", "Combatting misinformation", "Unrelated","Public health\nrecommendations")) +
  labs(x = "", y = "Percentage of COVID-19 related content with\ndifferent information types")

ggsave("figures/fig1.eps", width = 7.5, height = 5)

```

## Supporting Figure 3

```{r}

tweets_vals2 = tweets %>%
  mutate(period = case_when(
      created_at > "2020-03-26" & created_at <= "2020-04-06" ~ 1,
      created_at <= "2020-03-26" ~ 0,
      TRUE ~ as.double(NA))) %>% 
  select(status_id, period, covid:soc) %>% 
  pivot_longer(cols = covid:soc) %>% 
  group_by(name, period) %>% 
  summarize(value = sum(ifelse(value > 0, 1, 0))) %>%
  group_by(period) %>% 
  mutate(max = max(value))

tweets_vals2 = bind_rows(
  filter(tweets_vals2, name %nin% c('bat','flu','vit','conspiracy')) %>% mutate(type = "uncoded"),
  filter(tweets_vals2, name %in% c('bat','flu','vit','conspiracy')) %>% mutate(type = "raw"),
  filter(tweets_vals2, name %in% c('bat','flu','vit','conspiracy')) %>% mutate(type = "combat"),
  filter(tweets_vals2, name %in% c('bat','flu','vit','conspiracy')) %>% mutate(type = "misinf")
) %>% 
  mutate(pe = 
           case_when(
             name == "bat" & type == "misinf" ~ value*tweets_bat_misinfo$mean,
             name == "vit" & type == "misinf" ~ value*tweets_vit_misinfo$mean,
             name == "flu" & type == "misinf" ~ value*tweets_flu_misinfo$mean,
             name == "conspiracy" & type == "misinf" ~ value*tweets_con_misinfo$mean,
             name == "bat" & type == "combat" ~ value*tweets_bat_combat$mean,
             name == "vit" & type == "combat" ~ value*tweets_vit_combat$mean,
             name == "flu" & type == "combat" ~ value*tweets_flu_combat$mean,
             name == "conspiracy" & type == "combat" ~ value*tweets_con_combat$mean,
             name == "bat" ~ value*tweets_bat_other$mean,
             name == "vit" ~ value*tweets_vit_other$mean,
             name == "flu" ~ value*tweets_flu_other$mean,
             name == "conspiracy" ~ value*tweets_con_other$mean,
             TRUE ~ value
           ), 
         lb = case_when(
           name == "bat" & type == "misinf" ~ value*tweets_bat_misinfo$lower,
           name == "vit" & type == "misinf" ~ value*tweets_vit_misinfo$lower,
           name == "flu" & type == "misinf" ~ value*tweets_flu_misinfo$lower,
           name == "conspiracy" & type == "misinf" ~ value*tweets_con_misinfo$lower,
           TRUE ~ as.double(NA)
         ),
         ub = case_when(
           name == "bat" & type == "misinf" ~ value*tweets_bat_misinfo$upper,
           name == "vit" & type == "misinf" ~ value*tweets_vit_misinfo$upper,
           name == "flu" & type == "misinf" ~ value*tweets_flu_misinfo$upper,
           name == "conspiracy" & type == "misinf" ~ value*tweets_con_misinfo$upper,
           TRUE ~ as.double(NA)
         )
  ) %>% 
  mutate(pe = pe/max, lb = lb/max, ub = ub/max)

  
ggdat2 = bind_rows(
  tweets_vals2 %>% mutate(medium = "Twitter"), 
) %>% 
  ungroup() %>% 
  mutate(
    period = ordered(
      case_when(
        period == 1 ~ "Study",
        period == 0 ~ "Pre-",
        TRUE ~ "Post-"), 
      levels = c("Pre-", "Study", "Post-")),
    type = ordered(
      case_when(
        type == "misinf" ~ "Misinformation",
        type == "combat" ~ "Combatting misinformation",
        type == "uncoded" ~ "Public health\nrecommendations",
        TRUE ~ "Unrelated"
      ),
      levels = c("Public health\nrecommendations", "Combatting misinformation", "Misinformation", "Unrelated")
    ),
    category = ordered(
      case_when(
        name == "bat" ~ "Consumption of\nbats",
        name == "vit" ~ "Vitamin-C",
        name == "flu" ~ "No worse than\nthe flu",
        name == "soc" ~ "Social distancing",
        name == "motta" ~ "Motta",
        name == "conspiracy" ~ "Conspiracy",
        name == "qua" ~ "Quarantine",
        name == "hand" ~ "Hygiene"),
      levels = c("Vitamin-C","Consumption of\nbats","No worse than\nthe flu","Conspiracy",
                 "Hygiene","Social distancing")),
    type_of_information = ifelse(category %nin% c("Bat soup","Vitamin-C","No worse than\nthe flu",'Conspiracy'),
                                 "Information", "Misinformation")) %>%
  filter(!is.na(category), type != "Unrelated")

ggdat2 %>% 
  ggplot(aes(x = period, y = pe, fill = type)) +
  geom_bar(stat = "identity", col = "black", position = "stack") +
  geom_errorbar(aes(ymin = lb, ymax = ub), width = 0, size = 0.7, color = "black") +
  theme +
  facet_wrap(~category, scales = "free") + 
  scale_y_continuous(labels = scales::percent) + 
  scale_fill_manual(values = colors[c(1,6,4,4)],
                     breaks = c("Misinformation", "Combatting misinformation", "Unrelated","Public health\nrecommendations")) +
  labs(x = "", y = "Percentage of COVID-19 related content with\ndifferent information types") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("figures/sup1.eps", width = 7.5, height = 5)
```

## Figures 2, 3, Supporting Figure 2

```{r}

# Replace with your path and Stata version
library(RStata)
options("RStata.StataPath" = "/Applications/Stata/StataMP.app/Contents/MacOS/stata-mp")
options("RStata.StataVersion" = 13)
chooseStataBin()

stata("survey_analysis.do")

```

```{r}

```

